Bubble Chart

Hanna Rodrigues Ferreira

16 outubro 2021

data <- read.csv("owid-covid-data.csv")

data <- data %>% mutate(cases = total_cases, 
                        deaths = total_deaths,
                        vac1 = people_vaccinated,
                        vac2 = people_fully_vaccinated,
                        pop = population)

data <- data %>% select(continent,
                        location,
                        cases,
                        deaths,
                        vac1,
                        vac2,
                        date,
                        pop)

data <- data %>% filter(!(location %in% c("World",
                                         "Asia",
                                         "Europe",
                                         "North America",
                                         "European Union",
                                         "South America",
                                         "Africa",
                                         "Oceania",
                                         "International",
                                         "Northern Cyprus"))) #NA's

glimpse(data)
## Rows: 112,772
## Columns: 8
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, ~
## $ location  <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghanis~
## $ cases     <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 11, 11, 11, ~
## $ deaths    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vac1      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vac2      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ date      <fct> 2020-02-24, 2020-02-25, 2020-02-26, 2020-02-27, 2020-02-28, ~
## $ pop       <dbl> 39835428, 39835428, 39835428, 39835428, 39835428, 39835428, ~
summary(data)
##          continent            location          cases              deaths      
##               :    0   Argentina  :   630   Min.   :       1   Min.   :     1  
##  Africa       :30055   Mexico     :   630   1st Qu.:    1796   1st Qu.:    63  
##  Asia         :27371   Peru       :   630   Median :   16850   Median :   482  
##  Europe       :27795   Thailand   :   627   Mean   :  457126   Mean   : 11681  
##  North America:15185   Taiwan     :   615   3rd Qu.:  165550   3rd Qu.:  4085  
##  Oceania      : 5398   South Korea:   610   Max.   :42410607   Max.   :678407  
##  South America: 6968   (Other)    :109030   NA's   :5685       NA's   :16223   
##       vac1                vac2                   date       
##  Min.   :0.000e+00   Min.   :1.000e+00   2021-06-21:   219  
##  1st Qu.:1.559e+05   1st Qu.:7.920e+04   2021-06-22:   219  
##  Median :9.441e+05   Median :6.225e+05   2021-06-23:   219  
##  Mean   :8.855e+06   Mean   :5.664e+06   2021-06-24:   219  
##  3rd Qu.:4.753e+06   3rd Qu.:3.372e+06   2021-06-25:   219  
##  Max.   :1.101e+09   Max.   :1.022e+09   2021-06-26:   219  
##  NA's   :89772       NA's   :92634       (Other)   :111458  
##       pop           
##  Min.   :4.700e+01  
##  1st Qu.:1.933e+06  
##  Median :8.715e+06  
##  Mean   :4.099e+07  
##  3rd Qu.:2.967e+07  
##  Max.   :1.444e+09  
## 
data <- data %>%
    group_by(location) %>% 
    fill(cases,
         deaths,
         vac1,
         vac2, .direction = c("down"))

data <- data %>% 
        group_by(location) %>%
        mutate(cases = 100*replace_na(cases,0)/pop,
               deaths = 100*replace_na(deaths,0)/pop,
               vac1 = 100*replace_na(vac1,0)/pop,
               vac2 = 100*replace_na(vac2,0)/pop)
data <- data %>%
        mutate(date_aux = as_date(date) )%>%
        filter(year(date_aux)==2021)

data <- data %>% select(-pop,-date_aux)
summary(data)
##          continent                    location         cases        
##               :    0   Afghanistan        :  264   Min.   : 0.0000  
##  Africa       :14348   Albania            :  264   1st Qu.: 0.1039  
##  Asia         :12634   Algeria            :  264   Median : 1.0669  
##  Europe       :13304   Andorra            :  264   Mean   : 2.8146  
##  North America: 8263   Angola             :  264   3rd Qu.: 4.7026  
##  Oceania      : 3883   Antigua and Barbuda:  264   Max.   :21.3093  
##  South America: 3235   (Other)            :54083                    
##      deaths              vac1              vac2                  date      
##  Min.   :0.000000   Min.   :  0.000   Min.   :  0.0000   2021-06-21:  219  
##  1st Qu.:0.001084   1st Qu.:  0.111   1st Qu.:  0.0000   2021-06-22:  219  
##  Median :0.012629   Median :  4.544   Median :  0.8798   2021-06-23:  219  
##  Mean   :0.050606   Mean   : 17.437   Mean   : 11.0644   2021-06-24:  219  
##  3rd Qu.:0.081345   3rd Qu.: 29.046   3rd Qu.: 13.5420   2021-06-25:  219  
##  Max.   :0.596731   Max.   :118.346   Max.   :117.1322   2021-06-26:  219  
##                                                          (Other)   :54353
glimpse(data)
## Rows: 55,667
## Columns: 7
## Groups: location [223]
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, ~
## $ location  <fct> Afghanistan, Afghanistan, Afghanistan, Afghanistan, Afghanis~
## $ cases     <dbl> 0.1318249, 0.1320081, 0.1323169, 0.1328190, 0.1330750, 0.133~
## $ deaths    <dbl> 0.005525232, 0.005550336, 0.005575439, 0.005598032, 0.005615~
## $ vac1      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ vac2      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ date      <fct> 2021-01-01, 2021-01-02, 2021-01-03, 2021-01-04, 2021-01-05, ~
data %>% filter_all(any_vars(is.na(.)))
continent location cases deaths vac1 vac2 date
names <- c('Brazil',
           'United States',
           'Canada',
           'Mexico',
           'Germany',
           'United Kingdom',
           'French',
           'Italy',
           'Spain',
           'Russia',
           'India',
           'South Korea',
           'China',
           'Japan',
           'Australia')


colors <- c('#F28B30', # Asia (laranja)
            '#BF0A3A', # Europa (vermelho)
            '#022873', # Am?rica do norte (azul)
            '#F23D6D', # Oceania (rosa)
            'gray',    # Outros (cinza)
            '#03A62C') # Am?rica do sul (verde)describe(data)
p <- data %>% 
  ggplot(aes(x=cases,
             y=deaths,
             size=vac2)) +
  geom_point(aes(color=continent),alpha=0.6) +
  scale_size(range = c(.1, 24), name="fully vaccinated") +
  scale_colour_manual(values = colors) +
  xlim(-1, 20) +
  ylim(-0.07, .61) +
  theme_classic() +
  theme(legend.position = c(0.83, 0.86)) +
  guides(size = 'none') +
  labs(title="COVID-19 vaccinations of top 15 GPD countries")

ggplotly(p)
p
## Warning: Removed 31 rows containing missing values (geom_point).